package cn.chen.utils;

import org.apache.commons.math3.stat.descriptive.rank.Percentile;

import java.util.ArrayList;
import java.util.List;

/**
 * 数据清洗相关
 */
public class DataCleanerUtils {

    /**
     * 使用 IQR 法检测异常值
     */
    public static List<Double> detectOutliers(double[] data) {
        Percentile percentile = new Percentile().withEstimationType(Percentile.EstimationType.R_7);
        double q1 = percentile.evaluate(data, 25);
        double q3 = percentile.evaluate(data, 75);
        double iqr = q3 - q1;
        double upperBound = q3 + 1.5 * iqr;
        List<Double> outliers = new ArrayList<>();
        for (double val : data) {
            if (val > upperBound) {
                outliers.add(val);
            }
        }
        return outliers;
    }
}